################################################################################
## setup
################################################################################
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("foreign",
         "tidyverse",
         "RColorBrewer", 
         "gridExtra",
         "tools", 
         "lfe", 
         "plm", 
         "lmtest", 
         "stargazer", 
         "multcomp")
lapply(pkg, require, character.only = TRUE)

# set directory
MAIN_DIR <- "~/Dropbox/Research/Liao_Newman_Malhotra/replication/"

# set file directories
FIG_DIR <- paste(MAIN_DIR, "figures/", sep = "")
TABLE_DIR <- paste(MAIN_DIR, "tables/", sep = "")
RESULTS <- paste(MAIN_DIR, "results/", sep = "")

# create folder if doesn't exist
if (!dir.exists(FIG_DIR)) {
  print("Directory does not exist! Creating...")
  dir.create(FIG_DIR)
}
if (!dir.exists(TABLE_DIR)) {
  print("Directory does not exist! Creating...")
  dir.create(TABLE_DIR)
}
if (!dir.exists(RESULTS)) {
  print("Directory does not exist! Creating...")
  dir.create(RESULTS)
}

# first time running code?
first.time <- TRUE # fresh run of everything but simulations will take time
#first.time <- FALSE # saves time by loading pre-saved outputs

# load data
load(file = paste(MAIN_DIR, "cces-merge-rep.RData", sep = ""))

# set data
df <- cces.merge.rep


################################################################################
## create var and var name correspondence
################################################################################
var.df <- tibble(var = c("time3",
                         "time2",
                         
                         "log_china_undergraduate",
                         "log_india_undergraduate",
                         "log_china_graduate",
                         
                         "china_inter",
                         "india_inter",
                         "chinagraduate_inter",
                         
                         "china_inter_placebo",
                         "suitability2012_inter",
                         "suitabilitynext", 
                         "suitabilitynext_inter",
                         "suitability2010_inter",
                         "suitability2012_inter_placebo",
                         "suitability2012_india_inter",
                         "suitability2012_china_grad_inter",
                         "log_china_undergraduate_stable",
                         "time3_stable_homeowner",
                         "homeowner_threeway",
                         "log_china_undergraduate_birthyr",
                         "time3_birthyr",
                         "birthyr_threeway",
                         "log_china_undergraduate_educ",
                         "time3_educ",
                         "educ_threeway",
                         "log_china_undergraduate_white",
                         "time3_white",
                         "white_threeway",
                         "log_china_undergraduate_income",
                         "time3_income",
                         "income_threeway",
                         
                         # individual-level controls
                         "birthyr",
                         "educ",
                         "white",
                         "income",
                         "pid3",
                         "ideo",
                         "stable_homeowner",
                         
                         # zcta controls
                         "median_household_income_zcta",
                         "unemploy_zcta",
                         "ln_pop_density_zcta",
                         "share_enroll_zcta",
                         "pop_zcta_tot_thousand",
                         "ln_pop_zcta_tot",
                         "ln_enroll_zcta",
                         "ln_n_establish_zcta",
                         
                         "time3:median_household_income_zcta",
                         "time3:ln_pop_density_zcta",
                         "time3:share_enroll_zcta"),
                 var_name = c("Post-anti-corruption Campaign (2014)",
                              "Pre-anti-corruption Campaign (2012)",
                              
                              "Log Chinese Undergrads in Zip Code",
                              "Log Indian Undergrads in ZIP Code",
                              "Log Chinese Grad Students in ZIP Code",
                              
                              "Log Chinese Undergrads in Zip Code x Post-anti-corruption Campaign (2014)",
                              "Log Indian Undergrads in ZIP Code x Post-anti-corruption Campaign (2014)",
                              "Log Chinese Grad Students in ZIP Code x Post-anti-corruption Campaign (2014)",
                              
                              "Log Chinese Undergrads in ZIP Code x Pre-anti-corruption Campaign (2012)",
                              "Log Chinese Undergrads in Zip Code (2012) x Post-anti-corruption Campaign (2014)",
                              "Log Chinese Undergrads in ZIP Code (Lead)",
                              "Log Chinese Undergrads in ZIP Code (Lead) x Pre-anti-corruption Campaign (2012)",
                              "Log Chinese Undergrads in ZIP Code (2010) x Pre-anti-corruption Campaign (2012)",
                              "Log Chinese Undergrads in ZIP Code (2012) x Pre-anti-corruption Campaign (2012)",
                              "Log Indian Undergrads in ZIP Code (2012) x Post-anti-corruption Campaign (2014)",
                              "Log Chinese Grad Students in ZIP Code (2012) x Post-anti-corruption Campaign (2014)",
                              "Log Chinese Undergrads in ZIP Code x Stable Homeowner",
                              "Post-anti-corruption Campaign (2014) x Stable Homeowner",
                              "Log Chinese Undergrads in ZIP Code x Post-anti-corruption Campaign (2014) x Stable Homeowner",
                              "Log Chinese Undergrads in ZIP Code x Age",
                              "Post-anti-corruption Campaign (2014) x Age",
                              "Log Chinese Undergrads in ZIP Code x Post-anti-corruption Campaign (2014) x Age",
                              "Log Chinese Undergrads in ZIP Code x Education",
                              "Post-anti-corruption Campaign (2014) x Education",
                              "Log Chinese Undergrads in ZIP Code x Post-anti-corruption Campaign (2014) x Education",
                              "Log Chinese Undergrads in ZIP Code x White",
                              "Post-anti-corruption Campaign (2014) x White",
                              "Log Chinese Undergrads in ZIP Code x Post-anti-corruption Campaign (2014) x White",
                              "Log Chinese Undergrads in ZIP Code x Income",
                              "Post-anti-corruption Campaign (2014) x Income",
                              "Log Chinese Undergrads in ZIP Code x Post-anti-corruption Campaign (2014) x Income",
                              
                              # individual-level controls
                              "Age",
                              "Education",
                              "White",
                              "Income",
                              "Party Identification",
                              "Political Ideology",
                              "Stable Homeowner",
                              
                              # zcta controls
                              "Median Household Income (10,000)",
                              "Unemployment Rate",
                              "Log Population Density",
                              "Share of Population Enrolled in College or Above",
                              "Total Population (1,000)",
                              "Log Total Population",
                              "Log Population Enrolled in College or Above",
                              "Log Number of Business Establishments",
                              
                              "Median Household Income (10,000) x Post-anti-corruption Campaign (2014)",
                              "Population Density (log) x Post-anti-corruption Campaign (2014)",
                              "Share Enrolled in College or Above x Post-anti-corruption Campaign (2014)"
                 ))

# function to replace variable names
replaceVarName <- function(var.vec, var.df){
  # Prepare output vector
  out.vec <- rep(NA, length(var.vec))
  matches <- match(var.vec, var.df$var)
  out.vec <- var.df[matches,]$var_name
  
  if(any(is.na(out.vec))){
    warning(paste("Variable concordence missing: ", 
                  paste(var.vec[is.na(out.vec)], collapse = ", "), 
                  sep = ""))
  } else{
    #print("All variables successfully converted")
  }
  
  return(out.vec)
}


################################################################################
## Table S4. DiD Regression Main Results: Anti-immigration Attitudes
################################################################################
# Column (1): 
did.1.a <- plm(scale ~ log_china_undergraduate + time3 + china_inter,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
               index = c("zcta_10"), 
               method = "within", effect = "individual")
summary(did.1.a)

# compute robust standard errors consistent with Stata:
# xtreg scale log_china_undergraduate time3 china_inter if (time==3|time==2)&stay_12_14==1, fe vce(cluster zcta_10_numeric)
did.1.a.out <- coeftest(did.1.a, function(x) vcovHC(x, type = 'sss'))
did.1.a.out

# Column (2)
did.1.b <- plm(scale ~ time3 + suitability2012_inter,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
               index = c("zcta_10"), 
               method = "within", effect = "individual")
summary(did.1.b)
did.1.b.out <- coeftest(did.1.b, function(x) vcovHC(x, type = 'sss'))
did.1.b.out

# Column (3)
did.1.a.case <- plm(scale ~ log_china_undergraduate + time3 + china_inter,
                    data = df %>% 
                      dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
                    index = c("case_id"), 
                    method = "within", effect = "individual")
summary(did.1.a.case)
did.1.a.case.out <- coeftest(did.1.a.case, function(x) vcovHC(x, type = 'sss'))
did.1.a.case.out

# Column (4)
did.1.b.case <- plm(scale ~ time3 + suitability2012_inter,
                    data = df %>% 
                      dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
                    index = c("case_id"), 
                    method = "within", effect = "individual")
summary(did.1.b.case)
did.1.b.case.out <- coeftest(did.1.b.case, function(x) vcovHC(x, type = 'sss'))
did.1.b.case.out

# set models
models.cces.main <- list(did.1.a, 
                         did.1.b,
                         did.1.a.case,
                         did.1.b.case)

se.cces.main <- list(did.1.a.out[,2], 
                     did.1.b.out[,2],
                     did.1.a.case.out[,2], 
                     did.1.b.case.out[,2])

pvalue.cces.main <- list(round(did.1.a.out[,4], 3), 
                         round(did.1.b.out[,4], 3),
                         round(did.1.a.case.out[,4], 3),
                         round(did.1.b.case.out[,4], 3))

ci.cces.main <- list(confint(did.1.a.out), confint(did.1.b.out),
                     confint(did.1.a.case.out), confint(did.1.b.case.out))

# set var order
var.order.cces.main <- c("^log_china_undergraduate$",
                         "^time3$",
                         "^china_inter$",
                         "^suitability2012_inter$")

# set var label
var.label.cces.main <- str_replace_all(var.order.cces.main, "\\^", "")
var.label.cces.main <- str_replace_all(var.label.cces.main, "\\$", "")

# save
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above. 
sink(file.path(TABLE_DIR, "Table-S4-cces-main.txt"))
#sink(file.path(TABLE_DIR, "Table-S4-cces-main.tex"))
stargazer(models.cces.main,
          se = se.cces.main,
          p = pvalue.cces.main, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.cces.main,
          report = ("vcsp"),
          type = "text",
          #type = "latex",
          label = "tb:cces-main",
          font.size = "footnotesize",
          single.row = FALSE,
          title = "DiD Regression Main Results: Anti-immigration Attitudes",
          dep.var.labels = "2012--2014 Anti-immigration Attitudes",
          model.numbers = TRUE,
          order = var.order.cces.main,
          covariate.labels = replaceVarName(var.vec = var.label.cces.main,
                                            var.df = var.df),
          #column.labels = rep("Scale", 4),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 2), rep("No", 2)),
                           c("Fixed Effects: Respondent",
                             rep("No", 2), rep("Yes", 2)),
                           c("Group Size", 
                             formatC(pdim(did.1.a)$nT$n, big.mark = ","), 
                             formatC(pdim(did.1.b)$nT$n, big.mark = ","),
                             formatC(pdim(did.1.a.case)$nT$n, big.mark = ","), 
                             formatC(pdim(did.1.b.case)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          digits = 2,
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01."),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code. Models in columns (3)/(4) produce virtually identical estimates to those in (1)/(2)."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Table S5. DiD Regression Results: Time Placebo Tests
################################################################################
# Column (1)
did.2.a <- plm(scale ~ suitabilitynext + time2 + suitabilitynext_inter,
               data = df %>% 
                 dplyr::filter((time == 2 | time == 1) & stay_10_12 == 1), 
               index = c("zcta_10"),
               method = "within", effect = "individual")
summary(did.2.a)
did.2.a.out <- coeftest(did.2.a, function(x) vcovHC(x, type = 'sss'))
did.2.a.out

# Column (2)
did.2.b <- plm(scale ~ time2 + suitability2012_inter_placebo,
               data = df %>% 
                 dplyr::filter((time == 2 | time == 1) & stay_10_12 == 1), 
               index = c("zcta_10"),
               method = "within", effect = "individual")
summary(did.2.b)
did.2.b.out <- coeftest(did.2.b, function(x) vcovHC(x, type = 'sss'))
did.2.b.out

# Column (3)
did.2.c <- plm(scale ~ log_china_undergraduate + time2 + china_inter_placebo,
               data = df %>% 
                 dplyr::filter((time == 2 | time == 1) & stay_10_12 == 1), 
               index = c("zcta_10"),
               method = "within", effect = "individual")
summary(did.2.c)
did.2.c.out <- coeftest(did.2.c, function(x) vcovHC(x, type = 'sss'))
did.2.c.out

# Column (4)
did.2.d <- plm(scale ~ time2 + suitability2010_inter,
               data = df %>% 
                 dplyr::filter((time == 2 | time == 1) & stay_10_12 == 1), 
               index = c("zcta_10"),
               method = "within", effect = "individual")
summary(did.2.d)
did.2.d.out <- coeftest(did.2.d, function(x) vcovHC(x, type = 'sss'))
did.2.d.out

# set models
models.time.placebo <- list(did.2.a, 
                            did.2.b, 
                            did.2.c, 
                            did.2.d)

se.time.placebo <- list(did.2.a.out[,2], 
                        did.2.b.out[,2],
                        did.2.c.out[,2], 
                        did.2.d.out[,2])

pvalue.time.placebo <- list(round(did.2.a.out[,4], 3),
                            round(did.2.b.out[,4], 3),
                            round(did.2.c.out[,4], 3),
                            round(did.2.d.out[,4], 3))

ci.time.placebo <- list(confint(did.2.a.out), 
                        confint(did.2.b.out),
                        confint(did.2.c.out), 
                        confint(did.2.d.out))

# set var order
var.order.time.placebo <- c("^suitabilitynext$",
                            "^time2$",
                            "^suitabilitynext_inter$",
                            "^suitability2012_inter_placebo$",
                            "^log_china_undergraduate$",
                            "^china_inter_placebo$",
                            "^suitability2010_inter$")
                         
# set var label
var.label.time.placebo <- str_replace_all(var.order.time.placebo, "\\^", "")
var.label.time.placebo <- str_replace_all(var.label.time.placebo, "\\$", "")

# save
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above. 
sink(file.path(TABLE_DIR, "Table-S5-cces-time-placebo.txt"))
#sink(file.path(TABLE_DIR, "Table-S5-cces-time-placebo.tex"))
stargazer(models.time.placebo,
          se = se.time.placebo,
          p = pvalue.time.placebo, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.time.placebo,
          report = ('vcsp'),
          type = "text",
          #type = "latex",
          label = "tb:cces-time",
          font.size = "footnotesize",
          single.row = FALSE,
          title = "DiD Regression Results: Time Placebo Tests",
          dep.var.labels = "2010--2012 Anti-immigration Attitudes",
          model.numbers = TRUE,
          order = var.order.time.placebo,
          covariate.labels = replaceVarName(var.vec = var.label.time.placebo,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 4)),
                           c("Group Size", 
                             formatC(pdim(did.2.a)$nT$n, big.mark = ","), 
                             formatC(pdim(did.2.b)$nT$n, big.mark = ","),
                             formatC(pdim(did.2.c)$nT$n, big.mark = ","), 
                             formatC(pdim(did.2.d)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          digits = 2,
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Table S6. DiD Regression Results: Origin Placebo Tests
################################################################################
# Column (1)
did.3.a <- plm(scale ~ log_india_undergraduate + time3 + india_inter,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
               index = c("zcta_10"),
               method = "within", effect = "individual")
summary(did.3.a)
did.3.a.out <- coeftest(did.3.a, function(x) vcovHC(x, type = 'sss'))
did.3.a.out

# Column (2)
did.3.b <- plm(scale ~ log_china_graduate + time3 + chinagraduate_inter,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
               index = c("zcta_10"),
               method = "within", effect = "individual")
summary(did.3.b)
did.3.b.out <- coeftest(did.3.b, function(x) vcovHC(x, type = 'sss'))
did.3.b.out

# Column (3)
did.3.c <- plm(scale ~ time3 + suitability2012_india_inter,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
               index = c("zcta_10"),
               method = "within", effect = "individual")
summary(did.3.c)
did.3.c.out <- coeftest(did.3.c, function(x) vcovHC(x, type = 'sss'))
did.3.c.out

# Column (4)
did.3.d <- plm(scale ~ time3 + suitability2012_china_grad_inter,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1),
               index = c("zcta_10"), 
               method = "within", effect = "individual")
summary(did.3.d)
did.3.d.out <- coeftest(did.3.d, function(x) vcovHC(x, type = 'sss'))
did.3.d.out

# set models
models.origin.placebo <- list(did.3.a, 
                              did.3.b, 
                              did.3.c, 
                              did.3.d)

se.origin.placebo <- list(did.3.a.out[,2], 
                          did.3.b.out[,2],
                          did.3.c.out[,2], 
                          did.3.d.out[,2])

pvalue.origin.placebo <- list(round(did.3.a.out[,4], 3),
                              round(did.3.b.out[,4], 3),
                              round(did.3.c.out[,4], 3),
                              round(did.3.d.out[,4], 3))

ci.origin.placebo <- list(confint(did.3.a.out), 
                          confint(did.3.b.out),
                          confint(did.3.c.out), 
                          confint(did.3.d.out))

# set var order
var.order.origin.placebo <- c("^log_india_undergraduate$",
                              "^time3$",
                              "^india_inter$",
                              "^log_china_graduate$",
                              "^chinagraduate_inter$",
                              "^suitability2012_india_inter$",
                              "^suitability2012_china_grad_inter$")

# set var label
var.label.origin.placebo <- str_replace_all(var.order.origin.placebo, "\\^", "")
var.label.origin.placebo <- str_replace_all(var.label.origin.placebo, "\\$", "")

# save
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above. 
sink(file.path(TABLE_DIR, "Table-S6-cces-origin-placebo.txt"))
#sink(file.path(TABLE_DIR, "Table-S6-cces-origin-placebo.tex"))
stargazer(models.origin.placebo,
          se = se.origin.placebo,
          p = pvalue.origin.placebo, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.origin.placebo,
          report = ('vcsp'),
          type = "text",
          #type = "latex",
          label = "tb:cces-origin",
          font.size = "footnotesize",
          single.row = FALSE,
          title = "DiD Regression Results: Origin Placebo Tests",
          dep.var.labels = "2012--2014 Anti-immigration Attitudes",
          model.numbers = TRUE,
          order = var.order.origin.placebo,
          covariate.labels = replaceVarName(var.vec = var.label.origin.placebo,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 4)),
                           c("Group Size", 
                             formatC(pdim(did.3.a)$nT$n, big.mark = ","), 
                             formatC(pdim(did.3.b)$nT$n, big.mark = ","),
                             formatC(pdim(did.3.c)$nT$n, big.mark = ","), 
                             formatC(pdim(did.3.d)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          digits = 2,
          digits.extra = 3,
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Table S7. Heterogeneous Treatment Effects by Homeownership
################################################################################
# Column (1)
did.4.a <- plm(scale ~ log_china_undergraduate + time3 + china_inter + 
                 birthyr +
                 educ + 
                 white + 
                 income,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
               index = c("zcta_10"), 
               method = "within", effect = "individual")
summary(did.4.a)
did.4.a.out <- coeftest(did.4.a, function(x) vcovHC(x, type = 'sss'))
did.4.a.out

# Column (2)
did.4.b <- plm(scale ~ log_china_undergraduate + time3 + china_inter + 
                 stable_homeowner + 
                 log_china_undergraduate_stable + 
                 time3_stable_homeowner + homeowner_threeway +
                 birthyr + log_china_undergraduate_birthyr + time3_birthyr + birthyr_threeway +
                 educ + log_china_undergraduate_educ + time3_educ + educ_threeway +
                 white + log_china_undergraduate_white + time3_white + white_threeway +
                 income + log_china_undergraduate_income + time3_income + income_threeway,
               data = df %>% 
                 dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1), 
               index = c("zcta_10"), 
               method = "within", effect = "individual")
summary(did.4.b)
did.4.b.out <- coeftest(did.4.b, function(x) vcovHC(x, type = 'sss'))
did.4.b.out

# fit lfe object to get coefs for simulation
did.4.b.lfe <- felm(scale ~ log_china_undergraduate + time3 + china_inter +
                      stable_homeowner +
                      log_china_undergraduate_stable +
                      time3_stable_homeowner + homeowner_threeway +
                      birthyr + log_china_undergraduate_birthyr + time3_birthyr + birthyr_threeway +
                      educ + log_china_undergraduate_educ + time3_educ + educ_threeway +
                      white + log_china_undergraduate_white + time3_white + white_threeway +
                      income + log_china_undergraduate_income + time3_income + income_threeway
                    | zcta_10 | 0 | zcta_10, # FEs | IVs | Clustered SEs
                    data = df %>%
                      dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1),
                    keepX = TRUE)
summary(did.4.b.lfe)

# set models
models.cces.hetero <- list(did.4.a, 
                           did.4.b)

se.cces.hetero <- list(did.4.a.out[,2], 
                       did.4.b.out[,2])

pvalue.cces.hetero <- list(round(did.4.a.out[,4], 3),
                           round(did.4.b.out[,4], 3))

ci.cces.hetero <- list(confint(did.4.a.out), 
                       confint(did.4.b.out))

# set var order
var.order.cces.hetero <- c("^log_china_undergraduate$",
                           "^time3$",
                           "^china_inter$",
                           "^stable_homeowner$",
                           "^log_china_undergraduate_stable$",
                           "^time3_stable_homeowner$",
                           "^homeowner_threeway$",
                           "^birthyr$",
                           "^log_china_undergraduate_birthyr$",
                           "^time3_birthyr$",
                           "^birthyr_threeway$",
                           "^educ$",
                           "^log_china_undergraduate_educ$",
                           "^time3_educ$",
                           "^educ_threeway$",
                           "^white$",
                           "^log_china_undergraduate_white$",
                           "^time3_white$",
                           "^white_threeway$",
                           "^income$",
                           "^log_china_undergraduate_income$",
                           "^time3_income$",
                           "^income_threeway$")

# set var label
var.label.cces.hetero <- str_replace_all(var.order.cces.hetero, "\\^", "")
var.label.cces.hetero <- str_replace_all(var.label.cces.hetero, "\\$", "")

# save
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above.
sink(file.path(TABLE_DIR, "Table-S7-cces-hetero.txt"))
#sink(file.path(TABLE_DIR, "Table-S7-cces-hetero.tex"))
stargazer(models.cces.hetero,
          se = se.cces.hetero,
          p = pvalue.cces.hetero, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.cces.hetero,
          report = ("vcsp"),
          type = "text",
          #type = "latex",
          label = "tb:cces-hetero",
          font.size = "footnotesize",
          single.row = FALSE,
          title = "DiD Regression Results: Origin Placebo Tests",
          dep.var.labels = "2012--2014 Anti-immigration Attitudes",
          model.numbers = TRUE,
          order = var.order.cces.hetero,
          covariate.labels = replaceVarName(var.vec = var.label.cces.hetero,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 2)),
                           c("Group Size", 
                             formatC(pdim(did.4.a)$nT$n, big.mark = ","), 
                             formatC(pdim(did.4.b)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          digits = 2,
          digits.extra = 3,
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## subset data to analyze the effect of exposure on local economic outcomes
## unit of analysis: zcta-year
################################################################################
# main sample 
df.stay.12.14 <- df %>% 
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1) %>% 
  group_by(zcta_10, year) %>%
  slice(1) %>%
  dplyr::select(zcta_10, year, time2, time3,
                employ_zcta_100, median_household_income_zcta, n_establish_zcta, 
                #new_vehicle_total,
                log_china_undergraduate, suitability2012_inter, china_inter,
                suitabilitynext, suitabilitynext_inter, suitability2012_inter_placebo,
                log_india_undergraduate, india_inter,
                log_china_graduate, chinagraduate_inter,
                ln_pop_density_zcta, 
                pop_zcta_tot, pop_zcta_tot_thousand, 
                share_enroll_zcta, ln_enroll_zcta) %>%
  mutate(ln_n_establish_zcta = log(n_establish_zcta),
         ln_pop_zcta_tot = log(pop_zcta_tot_thousand + 1)#,
         #ln_n_vehicle_zcta = log(new_vehicle_total)
         )


################################################################################
## Table S8. DiD Regression Results: Employment Rates in CCES Respondents’ ZIP Codes
################################################################################
# Column (1)
did.employ.a <- plm(employ_zcta_100 ~ log_china_undergraduate + time3 + china_inter,
                    data = df.stay.12.14, 
                    index = c("zcta_10"),
                    method = "within", effect = "individual")
summary(did.employ.a)
did.employ.a.out <- coeftest(did.employ.a, function(x) vcovHC(x, type = 'sss'))
did.employ.a.out

# Column (2)
did.employ.b <- plm(employ_zcta_100 ~ time3 + suitability2012_inter,
                    data = df.stay.12.14,  
                    index = c("zcta_10"),
                    method = "within", effect = "individual")
summary(did.employ.b)
did.employ.b.out <- coeftest(did.employ.b, function(x) vcovHC(x, type = 'sss'))
did.employ.b.out

# Column (3)
did.employ.c <- plm(employ_zcta_100 ~ log_china_undergraduate + time3 + china_inter +
                      ln_pop_density_zcta + 
                      pop_zcta_tot_thousand +
                      share_enroll_zcta +
                      ln_enroll_zcta, 
                    data = df.stay.12.14, 
                    index = c("zcta_10"),
                    method = "within", effect = "individual")
summary(did.employ.c)
did.employ.c.out <- coeftest(did.employ.c, function(x) vcovHC(x, type = 'sss'))
did.employ.c.out

# fit lfe object to get coefs for simulation
did.employ.c.lfe <- felm(employ_zcta_100 ~ log_china_undergraduate + time3 + china_inter +
                           ln_pop_density_zcta + 
                           pop_zcta_tot_thousand +
                           share_enroll_zcta +
                           ln_enroll_zcta
                         | zcta_10 | 0 | zcta_10, # FEs | IVs | Clustered SEs
                         data = df.stay.12.14,
                         keepX = TRUE)
summary(did.employ.c.lfe)

# set models
models.zcta.employ <- list(did.employ.a, 
                           did.employ.b,
                           did.employ.c)

# set SEs
se.zcta.employ <- list(did.employ.a.out[,2], 
                       did.employ.b.out[,2],
                       did.employ.c.out[,2])

# set p-values
pvalue.zcta.employ <- list(round(did.employ.a.out[,4], 3),
                           round(did.employ.b.out[,4], 3),
                           round(did.employ.c.out[,4], 3))

# set CIs
ci.zcta.employ <- list(confint(did.employ.a.out), 
                       confint(did.employ.b.out),
                       confint(did.employ.c.out))

# set var order
var.order.employ <- c("^log_china_undergraduate$",
                      "^time3$",
                      "^china_inter$",
                      "^suitability2012_inter$",
                      
                      # zcta controls
                      "^ln_pop_density_zcta$",
                      "^pop_zcta_tot_thousand$",
                      "^share_enroll_zcta$",
                      "^ln_enroll_zcta$")

# set var label
var.label.employ <- str_replace_all(var.order.employ, "\\^", "")
var.label.employ <- str_replace_all(var.label.employ, "\\$", "")

# save results
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above.
sink(file.path(TABLE_DIR, "Table-S8-cces-actual-employ.txt"))
#sink(file.path(TABLE_DIR, "Table-S8-cces-actual-employ.tex"))
stargazer(models.zcta.employ,
          se = se.zcta.employ,
          p = pvalue.zcta.employ, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.zcta.employ,
          report = ("vcsp"),
          digits = 3, 
          type = "text",
          #type = "latex",
          label = "tb:did-cces-actual-employ",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "Chinese FREI Exposure and Employment Rates in CCES Respondent's ZIP-Codes",
          dep.var.labels = "2012-2014 ZIP-Code Employment Rates (percentage points)",
          model.numbers = TRUE,
          order = var.order.employ,
          covariate.labels = replaceVarName(var.vec = var.label.employ,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 6)),
                           c("Number of ZIP Codes", 
                             formatC(pdim(did.employ.a)$nT$n, big.mark = ","),
                             formatC(pdim(did.employ.b)$nT$n, big.mark = ","),
                             formatC(pdim(did.employ.c)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Table S9. DiD Regression Results: Median Household Income in CCES Respondents’ ZIP Codes
################################################################################
# check distribution of outcome
summary(df.stay.12.14$median_household_income_zcta)
hist(df.stay.12.14$median_household_income_zcta)

# Column (1)
did.medincome.a <- plm(median_household_income_zcta ~ log_china_undergraduate + 
                         time3 + china_inter,
                       data = df.stay.12.14, 
                       index = c("zcta_10"),
                       method = "within", effect = "individual")
summary(did.medincome.a)
did.medincome.a.out <- coeftest(did.medincome.a, function(x) vcovHC(x, type = 'sss'))
did.medincome.a.out

# Column (2)
did.medincome.b <- plm(median_household_income_zcta ~ time3 + suitability2012_inter,
                       data = df.stay.12.14,  
                       index = c("zcta_10"),
                       method = "within", effect = "individual")
summary(did.medincome.b)
did.medincome.b.out <- coeftest(did.medincome.b, function(x) vcovHC(x, type = 'sss'))
did.medincome.b.out

# Column (3)
did.medincome.c <- plm(median_household_income_zcta ~ log_china_undergraduate + time3 + china_inter + 
                         ln_pop_density_zcta + 
                         pop_zcta_tot_thousand +
                         share_enroll_zcta +
                         ln_enroll_zcta, 
                       data = df.stay.12.14, 
                       index = c("zcta_10"),
                       method = "within", effect = "individual")
summary(did.medincome.c)
did.medincome.c.out <- coeftest(did.medincome.c, function(x) vcovHC(x, type = 'sss'))
did.medincome.c.out

# fit lfe object to get coefs for simulation
did.medincome.c.lfe <- felm(median_household_income_zcta ~ log_china_undergraduate + time3 + china_inter +
                              ln_pop_density_zcta + 
                              pop_zcta_tot_thousand +
                              share_enroll_zcta +
                              ln_enroll_zcta
                            | zcta_10 | 0 | zcta_10, # FEs | IVs | Clustered SEs
                            data = df.stay.12.14,
                            keepX = TRUE)
summary(did.medincome.c.lfe)

# set models
models.zcta.medincome <- list(did.medincome.a, 
                              did.medincome.b,
                              did.medincome.c)

# set SEs
se.zcta.medincome <- list(did.medincome.a.out[,2], 
                          did.medincome.b.out[,2],
                          did.medincome.c.out[,2])

# set p-values
pvalue.zcta.medincome <- list(round(did.medincome.a.out[,4], 3),
                              round(did.medincome.b.out[,4], 3),
                              round(did.medincome.c.out[,4], 3))

# set CIs
ci.zcta.medincome <- list(confint(did.medincome.a.out), 
                          confint(did.medincome.b.out),
                          confint(did.medincome.c.out))

# set var order
var.order.medincome <- c("^log_china_undergraduate$",
                         "^time3$",
                         "^china_inter$",
                         "^suitability2012_inter$",
                         
                         # zcta controls
                         "^ln_pop_density_zcta$",
                         "^pop_zcta_tot_thousand$",
                         "^share_enroll_zcta$",
                         "^ln_enroll_zcta$")

# set var label
var.label.medincome <- str_replace_all(var.order.medincome, "\\^", "")
var.label.medincome <- str_replace_all(var.label.medincome, "\\$", "")

# save results
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above.
sink(file.path(TABLE_DIR, "Table-S9-cces-medincome.txt"))
#sink(file.path(TABLE_DIR, "Table-S9-cces-medincome.tex"))
stargazer(models.zcta.medincome,
          se = se.zcta.medincome,
          p = pvalue.zcta.medincome, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.zcta.medincome,
          report = ("vcsp"),
          digits = 3, 
          type = "text",
          #type = "latex",
          label = "tb:did-cces-medincome",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "Chinese FREI Exposure and the Median Household Income in CCES Respondent's ZIP-Codes",
          dep.var.labels = "2012-2014 ZIP-Code Median Household Income (10,000)",
          model.numbers = TRUE,
          order = var.order.medincome,
          covariate.labels = replaceVarName(var.vec = var.label.medincome,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 6)),
                           c("Number of ZIP Codes", 
                             formatC(pdim(did.medincome.a)$nT$n, big.mark = ","),
                             formatC(pdim(did.medincome.b)$nT$n, big.mark = ","),
                             formatC(pdim(did.medincome.c)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Table S10. DiD Regression Results: Business Establishments in CCES Respondents’ ZIP Codes
################################################################################
# Column (1)
did.establish.a <- plm(ln_n_establish_zcta ~ log_china_undergraduate + 
                         time3 + china_inter,
                       data = df.stay.12.14, 
                       index = c("zcta_10"),
                       method = "within", effect = "individual")
summary(did.establish.a)
did.establish.a.out <- coeftest(did.establish.a, function(x) vcovHC(x, type = 'sss'))
did.establish.a.out

# Column (2)
did.establish.b <- plm(ln_n_establish_zcta ~ time3 + suitability2012_inter,
                       data = df.stay.12.14, 
                       index = c("zcta_10"),
                       method = "within", effect = "individual")
summary(did.establish.b)
did.establish.b.out <- coeftest(did.establish.b, function(x) vcovHC(x, type = 'sss'))
did.establish.b.out

# Column (3)
did.establish.c <- plm(ln_n_establish_zcta ~ log_china_undergraduate + time3 + china_inter +
                         ln_pop_density_zcta + 
                         pop_zcta_tot_thousand +
                         share_enroll_zcta +
                         ln_enroll_zcta, 
                       data = df.stay.12.14, 
                       index = c("zcta_10"),
                       method = "within", effect = "individual")
summary(did.establish.c)
did.establish.c.out <- coeftest(did.establish.c, function(x) vcovHC(x, type = 'sss'))
did.establish.c.out

# fit lfe object to get coefs for simulation
did.establish.c.lfe <- felm(ln_n_establish_zcta ~ log_china_undergraduate + time3 + china_inter +
                              ln_pop_density_zcta + 
                              pop_zcta_tot_thousand +
                              share_enroll_zcta +
                              ln_enroll_zcta
                            | zcta_10 | 0 | zcta_10, # FEs | IVs | Clustered SEs
                            data = df.stay.12.14,
                            keepX = TRUE)
summary(did.establish.c.lfe)

# set models
models.zcta.establish <- list(did.establish.a, 
                              did.establish.b,
                              did.establish.c)

# set SEs
se.zcta.establish <- list(did.establish.a.out[,2], 
                          did.establish.b.out[,2],
                          did.establish.c.out[,2])

# set p-values
pvalue.zcta.establish <- list(round(did.establish.a.out[,4], 3),
                              round(did.establish.b.out[,4], 3),
                              round(did.establish.c.out[,4], 3))

# set CIs
ci.zcta.establish <- list(confint(did.establish.a.out), 
                          confint(did.establish.b.out),
                          confint(did.establish.c.out))

# set var order
var.order.establish <- c("^log_china_undergraduate$",
                         "^time3$",
                         "^china_inter$",
                         "^suitability2012_inter$",
                         
                         # zcta controls
                         "^ln_pop_density_zcta$",
                         "^pop_zcta_tot_thousand$",
                         "^share_enroll_zcta$",
                         "^ln_enroll_zcta$")

# set var label
var.label.establish <- str_replace_all(var.order.establish, "\\^", "")
var.label.establish <- str_replace_all(var.label.establish, "\\$", "")

# save results
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above.
sink(file.path(TABLE_DIR, "Table-S10-cces-establish.txt"))
#sink(file.path(TABLE_DIR, "Table-S10-cces-establish.tex"))
stargazer(models.zcta.establish,
          se = se.zcta.establish,
          p = pvalue.zcta.establish, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.zcta.establish,
          report = ("vcsp"),
          digits = 3, 
          type = "text",
          #type = "latex",
          label = "tb:did-cces-establish",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "Chinese FREI Exposure and the Log Number of Business Establishments in CCES Respondent's ZIP-Codes",
          dep.var.labels = "2012-2014 ZIP-Code Log Number of Business Establishments",
          model.numbers = TRUE,
          order = var.order.establish,
          covariate.labels = replaceVarName(var.vec = var.label.establish,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 6)),
                           c("Number of ZIP Codes", 
                             formatC(pdim(did.establish.a)$nT$n, big.mark = ","),
                             formatC(pdim(did.establish.b)$nT$n, big.mark = ","),
                             formatC(pdim(did.establish.c)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Table S11. DiD Regression Results: New Vehicle Registrations in CCES Respondents’ ZIP-Codes
## Note: Due to the authors' data non-disclosure agreement with Hedges & Company, 
## the outcome variable is excluded from the replication dataset. The code below
## loads results analyzed and saved by the authors.
################################################################################
# # check distribution
# names(df.stay.12.14)
# summary(df.stay.12.14$ln_n_vehicle_zcta)
# hist(df.stay.12.14$ln_n_vehicle_zcta)

# # Column (1)
# did.vehicle.a <- plm(ln_n_vehicle_zcta ~ log_china_undergraduate + 
#                        time3 + china_inter,
#                      data = df.stay.12.14, 
#                      index = c("zcta_10"),
#                      method = "within", effect = "individual")
# summary(did.vehicle.a)
# did.vehicle.a.out <- coeftest(did.vehicle.a, function(x) vcovHC(x, type = 'sss'))
# did.vehicle.a.out
# 
# # Column (2)
# did.vehicle.b <- plm(ln_n_vehicle_zcta ~ time3 + suitability2012_inter,
#                      data = df.stay.12.14, 
#                      index = c("zcta_10"),
#                      method = "within", effect = "individual")
# summary(did.vehicle.b)
# did.vehicle.b.out <- coeftest(did.vehicle.b, function(x) vcovHC(x, type = 'sss'))
# did.vehicle.b.out
# 
# # Column (3)
# did.vehicle.c <- plm(ln_n_vehicle_zcta ~ log_china_undergraduate + time3 + china_inter +
#                        ln_pop_density_zcta + 
#                        pop_zcta_tot_thousand +
#                        share_enroll_zcta +
#                        ln_enroll_zcta, 
#                      data = df.stay.12.14, 
#                      index = c("zcta_10"),
#                      method = "within", effect = "individual")
# summary(did.vehicle.c)
# did.vehicle.c.out <- coeftest(did.vehicle.c, function(x) vcovHC(x, type = 'sss'))
# did.vehicle.c.out
# 
# # fit lfe object to get coefs for simulation
# did.vehicle.c.lfe <- felm(ln_n_vehicle_zcta ~ log_china_undergraduate + time3 + china_inter +
#                             ln_pop_density_zcta + 
#                             pop_zcta_tot_thousand +
#                             share_enroll_zcta +
#                             ln_enroll_zcta
#                           | zcta_10 | 0 | zcta_10, # FEs | IVs | Clustered SEs
#                           data = df.stay.12.14,
#                           keepX = TRUE)
# summary(did.vehicle.c.lfe)
# 
# # collect and save because of non-disclosure agreement
# did.vehicle.out <- list(did.vehicle.a = did.vehicle.a,
#                         did.vehicle.b = did.vehicle.b,
#                         did.vehicle.c = did.vehicle.c,
#                         did.vehicle.c.lfe = did.vehicle.c.lfe)
# 
# save(did.vehicle.out, 
#      file = paste(MAIN_DIR, "did-vehicle.RData", sep = ""))

# load saved outputs
load(file = paste(MAIN_DIR, "did-vehicle.RData", sep = ""))

# compute SEs
did.vehicle.a.out <- coeftest(did.vehicle.out$did.vehicle.a, function(x) vcovHC(x, type = 'sss'))
did.vehicle.a.out
did.vehicle.b.out <- coeftest(did.vehicle.out$did.vehicle.b, function(x) vcovHC(x, type = 'sss'))
did.vehicle.b.out
did.vehicle.c.out <- coeftest(did.vehicle.out$did.vehicle.c, function(x) vcovHC(x, type = 'sss'))
did.vehicle.c.out

# set models
models.zcta.vehicle <- list(did.vehicle.out$did.vehicle.a, 
                            did.vehicle.out$did.vehicle.b,
                            did.vehicle.out$did.vehicle.c)

# set SEs
se.zcta.vehicle <- list(did.vehicle.a.out[,2], 
                        did.vehicle.b.out[,2],
                        did.vehicle.c.out[,2])

# set p-values
pvalue.zcta.vehicle <- list(round(did.vehicle.a.out[,4], 3),
                            round(did.vehicle.b.out[,4], 3),
                            round(did.vehicle.c.out[,4], 3))

# set CIs
ci.zcta.vehicle <- list(confint(did.vehicle.a.out), 
                        confint(did.vehicle.b.out),
                        confint(did.vehicle.c.out))

# set var order
var.order.vehicle <- c("^log_china_undergraduate$",
                       "^time3$",
                       "^china_inter$",
                       "^suitability2012_inter$",
                       
                       # zcta controls
                       "^ln_pop_density_zcta$",
                       "^pop_zcta_tot_thousand$",
                       "^share_enroll_zcta$",
                       "^ln_enroll_zcta$")

# set var label
var.label.vehicle <- str_replace_all(var.order.vehicle, "\\^", "")
var.label.vehicle <- str_replace_all(var.label.vehicle, "\\$", "")

# save results
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above.
sink(file.path(TABLE_DIR, "Table-S11-cces-vehicle.txt"))
#sink(file.path(TABLE_DIR, "Table-S11-cces-vehicle.tex"))
stargazer(models.zcta.vehicle,
          se = se.zcta.vehicle,
          p = pvalue.zcta.vehicle, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.zcta.vehicle,
          report = ("vcsp"),
          digits = 3, 
          type = "text",
          #type = "latex",
          label = "tb:did-cces-vehicle",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "Chinese FREI Exposure and the Log Number of New Vehicle Registrations in CCES Respondent's ZIP-Codes",
          dep.var.labels = "2012-2014 ZIP-Code Log Number of New Vehicle Registrations",
          model.numbers = TRUE,
          order = var.order.vehicle,
          covariate.labels = replaceVarName(var.vec = var.label.vehicle,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: ZIP Code Tabulation Area (ZCTA)",
                             rep("Yes", 6)),
                           c("Number of ZIP Codes", 
                             formatC(pdim(did.vehicle.out$did.vehicle.a)$nT$n, big.mark = ","),
                             formatC(pdim(did.vehicle.out$did.vehicle.b)$nT$n, big.mark = ","),
                             formatC(pdim(did.vehicle.out$did.vehicle.c)$nT$n, big.mark = ","))),
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Simulate effects: Substantive effect of Chinese FREI exposure for homeowners
################################################################################
# 1 sd above/below mean
mean.cn.ug <- df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1) %>%
  pull(log_china_undergraduate) %>%
  mean()
mean.cn.ug

sd.cn.ug <-  df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1) %>%
  pull(log_china_undergraduate) %>%
  sd()
sd.cn.ug

range.key <- c(mean.cn.ug - sd.cn.ug, mean.cn.ug + sd.cn.ug)
range.key

# range of moderator 1 (time3)
range.mod.1 <- df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1) %>%
  pull(time3) %>%
  unique() %>%
  sort()
range.mod.1

# range of moderator 2 (stable_homeowner == 1)
range.mod.2 <- 1
range.mod.2

# set parameters
sim.df <- df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1)
fm <- did.4.b.lfe
change.var <- "log_china_undergraduate"
moderator.1 <- "time3"
moderator.2 <- "stable_homeowner"
twoway.interaction.1 <- "china_inter"
twoway.interaction.2 <- "log_china_undergraduate_stable"
twoway.interaction.3 <- "time3_stable_homeowner"
threeway.interaction <- "homeowner_threeway"
range.key <- range.key
range.mod.1 <- range.mod.1
range.mod.2 <- range.mod.2
n.draws <- 1000
dv.log <- FALSE
range.log <- TRUE

if (first.time){
  # set seed for simulation
  seed <- 1234
  set.seed(seed)
  
  # load library to draw from multivariate normal
  library(MASS)
  vcov.out <- vcovHC(did.4.b, type = 'sss') # use the robust clustered SEs from Stata
  dim(vcov.out)
  vcov.out <- vcov.out[1:dim(vcov.out)[1], 1:dim(vcov.out)[1]]
  beta.draws <- mvrnorm(n.draws, coef(fm), vcov.out)
  dim(beta.draws)
  
  # extract FEs
  fe <- getfe(fm)
  fe.matrix <- t(fe[c("effect")])
  row.names(fe.matrix) <- NULL
  fe.matrix <- fe.matrix[rep(1:nrow(fe.matrix),
                             each = n.draws),]
  
  # combine with betas
  beta.draws <- cbind(beta.draws, fe.matrix)
  
  # extract model matrix
  model.df <- as.data.frame(fm$X)
  
  # combine with FEs
  fe.df <- bind_rows(fm$fe)
  model.df.full <- cbind(model.df, fe.df)
  
  # create dummies for FEs
  model.df.full <- model.df.full %>%
    mutate(new = 1,
           temp = row_number()) %>%
    spread(zcta_10, new, fill = 0) %>%
    dplyr::select(-temp)
  
  # for each value of change var, moderator 1, moderator 2
  sim.out.homeowner.zcta <- map_df(1:length(range.key), function(x){
    
    sim.out.sub <- map_df(1:length(range.mod.1), function(y){
      
      sim.out.sub.sub <- map_df(1:length(range.mod.2), function(z){
        
        # print
        cat(paste("Simulating predicted values for ", x, " of ",
                  length(range.key), " ", change.var, " values when ", moderator.1,
                  " = ", range.mod.1[[y]], " and when ", moderator.2,
                  " = ", range.mod.2[[z]],
                  "\n", sep = ""))
        
        # create temp data
        newd <- model.df.full
        
        if(range.log){
          # replace change var value x
          newd[,change.var] <- range.key[[x]]
          
          # replace moderator.1 value y
          newd[,moderator.1] <- range.mod.1[[y]]
          
          # replace moderator.2 value z
          newd[,moderator.2] <- range.mod.2[[z]]
          
          # calculate new interactions
          newd[,twoway.interaction.1] <- range.key[[x]] * range.mod.1[[y]]
          newd[,twoway.interaction.2] <- range.key[[x]] * range.mod.2[[z]]
          newd[,twoway.interaction.3] <- range.mod.1[[y]] * range.mod.2[[z]]
          newd[,threeway.interaction] <- range.key[[x]] * range.mod.1[[y]] * range.mod.2[[z]]
          
        } else{
          # replace change var value x
          newd[,change.var] <- log(range.key[[x]] + 1)
          
          # replace moderator.1 value y
          newd[,moderator.1] <- range.mod.1[[y]]
          
          # replace moderator.2 value z
          newd[,moderator.2] <- range.mod.2[[z]]
          
          # calculate new interaction
          newd[,twoway.interaction.1] <- log(range.key[[x]] + 1) * range.mod.1[[y]]
          newd[,twoway.interaction.2] <- log(range.key[[x]] + 1) * range.mod.2[[z]]
          newd[,twoway.interaction.3] <- range.mod.1[[y]] * range.mod.2[[z]]
          newd[,threeway.interaction] <- log(range.key[[x]] + 1) * range.mod.1[[y]] * range.mod.2[[z]]
          
        }
        
        # matrix multiplication for systematic component
        sys.com <- as.matrix(newd) %*% t(beta.draws)
        
        # average over observations for each draw
        ev <- as.data.frame(sys.com) %>%
          summarise_all(funs(mean))
        
        if(dv.log){
          # convert to original scale of outcome
          ev <- exp(ev) %>%
            mutate(mod.1 = range.mod.1[[y]],
                   mod.2 = range.mod.2[[z]])
          
          return(ev)
          
        } else{
          ev <- ev %>%
            mutate(mod.1 = range.mod.1[[y]],
                   mod.2 = range.mod.2[[z]])
          
          return(ev)
        }
      })
    })
    
    # add set identifier
    sim.out <- sim.out.sub %>%
      mutate(setx = range.key[[x]])
    
    return(sim.out)
  })
  
  # # check
  # glimpse(sim.out.homeowner.zcta)
  
  # save
  save(sim.out.homeowner.zcta,
       file = paste(RESULTS, "cces-sim-out-homeowner-zcta.RData", sep = ""))
  
} else {
  
  # load
  load(file = paste(RESULTS, "cces-sim-out-homeowner-zcta.RData", sep = ""))
  
}

# calculate DiD when moving from 1 s.d. below to above mean
did.out.homeowner.zcta <- sim.out.homeowner.zcta %>%
  gather(draw, value, -setx, -mod.1, -mod.2) %>%
  spread(mod.1, value) %>%
  mutate(fd = `1` - `0`) %>%
  dplyr::select(setx, draw, fd) %>%
  spread(setx, fd) %>%
  rename(minus_one_sd = 2, 
         plus_one_sd = 3) %>%
  mutate(did = plus_one_sd - minus_one_sd) %>%
  dplyr::summarize(mean = mean(did, na.rm = TRUE),
                   lwr = quantile(did, probs = 0.025),
                   upr = quantile(did, probs = 0.975)) %>%
  ungroup()

# compare effect size to variation in the outcome
did.out.homeowner.zcta # mean -6.43, lower -13.31, upr 0.22
sd(sim.df$scale, na.rm = TRUE) # 40.46
did.out.homeowner.zcta$mean/sd(sim.df$scale, na.rm = TRUE) # -0.16 s.d.


################################################################################
## Simulate effects: Substantive effect of Chinese FREI exposure for non-homeowners
################################################################################
# 1 sd above/below mean
mean.cn.ug <- df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1) %>%
  pull(log_china_undergraduate) %>%
  mean()
mean.cn.ug

sd.cn.ug <-  df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1) %>%
  pull(log_china_undergraduate) %>%
  sd()
sd.cn.ug

range.key <- c(mean.cn.ug - sd.cn.ug, mean.cn.ug + sd.cn.ug)
range.key
exp(range.key)

# range of moderator 1 (time3)
range.mod.1 <- df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1) %>%
  pull(time3) %>%
  unique() %>%
  sort()
range.mod.1

# range of moderator 2 (stable_homeowner == 0)
range.mod.2 <- 0
range.mod.2

# set parameters
sim.df <- df %>%
  dplyr::filter((time == 3 | time == 2) & stay_12_14 == 1)
fm <- did.4.b.lfe
change.var <- "log_china_undergraduate"
moderator.1 <- "time3"
moderator.2 <- "stable_homeowner"
twoway.interaction.1 <- "china_inter"
twoway.interaction.2 <- "log_china_undergraduate_stable"
twoway.interaction.3 <- "time3_stable_homeowner"
threeway.interaction <- "homeowner_threeway"
range.key <- range.key
range.mod.1 <- range.mod.1
range.mod.2 <- range.mod.2
n.draws <- 1000
dv.log <- FALSE
range.log <- TRUE

if (first.time){
  
  # set seed for simulation
  seed <- 1234
  set.seed(seed)
  
  # load library to draw from multivariate normal
  library(MASS)
  vcov.out <- vcovHC(did.4.b, type = 'sss') # use the robust clustered SEs from Stata
  dim(vcov.out)
  vcov.out <- vcov.out[1:dim(vcov.out)[1], 1:dim(vcov.out)[1]]
  beta.draws <- mvrnorm(n.draws, coef(fm), vcov.out)
  dim(beta.draws)
  
  # extract FEs
  fe <- getfe(fm)
  fe.matrix <- t(fe[c("effect")])
  row.names(fe.matrix) <- NULL
  fe.matrix <- fe.matrix[rep(1:nrow(fe.matrix),
                             each = n.draws),]
  
  # combine with betas
  beta.draws <- cbind(beta.draws, fe.matrix)
  
  # extract model matrix
  model.df <- as.data.frame(fm$X)
  
  # combine with FEs
  fe.df <- bind_rows(fm$fe)
  model.df.full <- cbind(model.df, fe.df)
  
  # create dummies for FEs
  model.df.full <- model.df.full %>%
    mutate(new = 1,
           temp = row_number()) %>%
    spread(zcta_10, new, fill = 0) %>%
    dplyr::select(-temp)
  
  # for each value of change var, moderator 1, moderator 2
  sim.out.nonhomeowner.zcta <- map_df(1:length(range.key), function(x){
    
    sim.out.sub <- map_df(1:length(range.mod.1), function(y){
      
      sim.out.sub.sub <- map_df(1:length(range.mod.2), function(z){
        
        # print
        cat(paste("Simulating predicted values for ", x, " of ",
                  length(range.key), " ", change.var, " values when ", moderator.1,
                  " = ", range.mod.1[[y]], " and when ", moderator.2,
                  " = ", range.mod.2[[z]],
                  "\n", sep = ""))
        
        # create temp data
        newd <- model.df.full
        
        if(range.log){
          # replace change var value x
          newd[,change.var] <- range.key[[x]]
          
          # replace moderator.1 value y
          newd[,moderator.1] <- range.mod.1[[y]]
          
          # replace moderator.2 value z
          newd[,moderator.2] <- range.mod.2[[z]]
          
          # calculate new interactions
          newd[,twoway.interaction.1] <- range.key[[x]] * range.mod.1[[y]]
          newd[,twoway.interaction.2] <- range.key[[x]] * range.mod.2[[z]]
          newd[,twoway.interaction.3] <- range.mod.1[[y]] * range.mod.2[[z]]
          newd[,threeway.interaction] <- range.key[[x]] * range.mod.1[[y]] * range.mod.2[[z]]
          
        } else{
          # replace change var value x
          newd[,change.var] <- log(range.key[[x]] + 1)
          
          # replace moderator.1 value y
          newd[,moderator.1] <- range.mod.1[[y]]
          
          # replace moderator.2 value z
          newd[,moderator.2] <- range.mod.2[[z]]
          
          # calculate new interaction
          newd[,twoway.interaction.1] <- log(range.key[[x]] + 1) * range.mod.1[[y]]
          newd[,twoway.interaction.2] <- log(range.key[[x]] + 1) * range.mod.2[[z]]
          newd[,twoway.interaction.3] <- range.mod.1[[y]] * range.mod.2[[z]]
          newd[,threeway.interaction] <- log(range.key[[x]] + 1) * range.mod.1[[y]] * range.mod.2[[z]]
          
        }
        
        # matrix multiplication for systematic component
        sys.com <- as.matrix(newd) %*% t(beta.draws)
        
        # average over observations for each draw
        ev <- as.data.frame(sys.com) %>%
          summarise_all(funs(mean))
        
        if(dv.log){
          # convert to original scale of outcome
          ev <- exp(ev) %>%
            mutate(mod.1 = range.mod.1[[y]],
                   mod.2 = range.mod.2[[z]])
          
          return(ev)
          
        } else{
          ev <- ev %>%
            mutate(mod.1 = range.mod.1[[y]],
                   mod.2 = range.mod.2[[z]])
          
          return(ev)
        }
      })
    })
    
    # add set identifier
    sim.out <- sim.out.sub %>%
      mutate(setx = range.key[[x]])
    
    return(sim.out)
  })
  
  # # check
  # glimpse(sim.out.nonhomeowner.zcta)
  
  # save
  save(sim.out.nonhomeowner.zcta, 
       file = paste(RESULTS, "cces-sim-out-nonhomeowner-zcta.RData", sep = ""))
  
} else {
  
  # load
  load(file = paste(RESULTS, "cces-sim-out-nonhomeowner-zcta.RData", sep = ""))
  
}

# calculate DiD when moving from 1 s.d. below to above mean
did.out.nonhomeowner.zcta <- sim.out.nonhomeowner.zcta %>%
  gather(draw, value, -setx, -mod.1, -mod.2) %>%
  spread(mod.1, value) %>%
  mutate(fd = `1` - `0`) %>%
  dplyr::select(setx, draw, fd) %>%
  spread(setx, fd) %>%
  rename(minus_one_sd = 2, 
         plus_one_sd = 3) %>%
  mutate(did = plus_one_sd - minus_one_sd) %>%
  dplyr::summarize(mean = mean(did, na.rm = TRUE),
                   lwr = quantile(did, probs = 0.025),
                   upr = quantile(did, probs = 0.975)) %>%
  ungroup()

# compare effect size to variation in the outcome
did.out.nonhomeowner.zcta # mean -9.35, lower -15.63, upr -3.32
sd(sim.df$scale, na.rm = TRUE) # 40.46
did.out.nonhomeowner.zcta$mean/sd(sim.df$scale, na.rm = TRUE) # -0.23 s.d.


################################################################################
## Simulate effects: Substantive effect of exposure on ZCTA Employment Rates
################################################################################
# 1 sd above/below mean
mean.cn.ug <- mean(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
mean.cn.ug

sd.cn.ug <- sd(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
sd.cn.ug

range.key <- c(mean.cn.ug - sd.cn.ug, mean.cn.ug + sd.cn.ug)
range.key
exp(range.key)

# range of moderator
range.mod <- sort(unique(df.stay.12.14$time3))
range.mod

# set parameters
sim.df <- df.stay.12.14 
fm <- did.employ.c.lfe
change.var <- "log_china_undergraduate"
moderator <- "time3"
interaction <- "china_inter"
range.key <- range.key
range.mod <- range.mod
n.draws <- 1000
dv.log <- FALSE
range.log <- TRUE

if (first.time){
  
  # set seed for simulation
  seed <- 1234
  set.seed(seed)
  
  # load library to draw from multivariate normal
  library(MASS)
  vcov.out <- vcovHC(did.employ.c, type = 'sss') # use the robust clustered SEs from Stata
  dim(vcov.out)
  vcov.out <- vcov.out[1:dim(vcov.out)[1], 1:dim(vcov.out)[1]]
  beta.draws <- mvrnorm(n.draws, coef(fm), vcov.out)
  dim(beta.draws)
  
  # extract FEs
  fe <- getfe(fm)
  fe.matrix <- t(fe[c("effect")])
  row.names(fe.matrix) <- NULL
  fe.matrix <- fe.matrix[rep(1:nrow(fe.matrix),
                             each = n.draws),]
  
  # combine with betas
  beta.draws <- cbind(beta.draws, fe.matrix)
  
  # extract model matrix
  model.df <- as.data.frame(fm$X)
  
  # combine with FEs
  fe.df <- bind_rows(fm$fe)
  model.df.full <- cbind(model.df, fe.df)
  
  # create dummies for FEs
  model.df.full <- model.df.full %>%
    mutate(new = 1,
           temp = row_number()) %>%
    spread(zcta_10, new, fill = 0) %>%
    dplyr::select(-temp)
  
  # for each value of change var
  sim.out.employ.zcta <- map_df(1:length(range.key), function(x){
    
    sim.out.sub <- map_df(1:length(range.mod), function(y){
      # print
      cat(paste("Simulating predicted probabilities for ", x, " of ",
                length(range.key), " ", change.var, " values when ", moderator,
                " = ", range.mod[[y]],
                "\n", sep = ""))
      
      # create temp data
      newd <- model.df.full
      
      if(range.log){
        # replace change var value x
        newd[,change.var] <- range.key[[x]]
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- range.key[[x]]*range.mod[[y]]
        
      } else{
        # replace change var value x
        newd[,change.var] <- log(range.key[[x]] + 1)
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- log(range.key[[x]] + 1)*range.mod[[y]]
        
      }
      
      # matrix multiplication for systematic component
      sys.com <- as.matrix(newd) %*% t(beta.draws)
      
      # average over observations for each draw
      ev <- as.data.frame(sys.com) %>%
        summarise_all(funs(mean))
      
      if(dv.log){
        # convert to original scale of outcome
        ev <- exp(ev) %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
        
      } else{
        ev <- ev %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
      }
    })
    
    # add set identifier
    sim.out <- sim.out.sub %>%
      mutate(setx = range.key[[x]])
    
    return(sim.out)
  })
  
  # # check
  # glimpse(sim.out.employ.zcta)
  
  # save
  save(sim.out.employ.zcta, 
       file = paste(RESULTS, "cces-sim-out-employ-zcta.RData", sep = ""))
  
} else {
  
  # load
  load(file = paste(RESULTS, "cces-sim-out-employ-zcta.RData", sep = ""))
  
}

# calculate DiD when moving from 1 s.d. below to above mean
did.out.employ.zcta <- sim.out.employ.zcta %>%
  gather(draw, value, -setx, -mod) %>%
  spread(mod, value) %>%
  mutate(fd = `1` - `0`) %>%
  dplyr::select(setx, draw, fd) %>%
  spread(setx, fd) %>%
  rename(minus_one_sd = 2, 
         plus_one_sd = 3) %>%
  mutate(did = plus_one_sd - minus_one_sd) %>%
  dplyr::summarize(mean = mean(did, na.rm = TRUE),
                   lwr = quantile(did, probs = 0.025),
                   upr = quantile(did, probs = 0.975)) %>%
  ungroup()

# compare effect size to variation in the outcome
did.out.employ.zcta # mean 0.45, lower 0.34, upr 0.57
sd(sim.df$employ_zcta_100, na.rm = TRUE) # 8.59
did.out.employ.zcta$mean/sd(sim.df$employ_zcta_100, na.rm = TRUE) # 0.05 s.d.


################################################################################
## Simulate effects: Substantive effect of exposure on ZCTA median household income
################################################################################
# 1 sd above/below mean
mean.cn.ug <- mean(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
mean.cn.ug

sd.cn.ug <- sd(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
sd.cn.ug

range.key <- c(mean.cn.ug - sd.cn.ug, mean.cn.ug + sd.cn.ug)
range.key
exp(range.key)

# range of moderator
range.mod <- sort(unique(df.stay.12.14$time3))
range.mod

# set parameters
sim.df <- df.stay.12.14 
fm <- did.medincome.c.lfe
change.var <- "log_china_undergraduate"
moderator <- "time3"
interaction <- "china_inter"
range.key <- range.key
range.mod <- range.mod
n.draws <- 1000
dv.log <- FALSE
range.log <- TRUE

if (first.time){
  
  # set seed for simulation
  seed <- 1234
  set.seed(seed)
  
  # load library to draw from multivariate normal
  library(MASS)
  vcov.out <- vcovHC(did.medincome.c, type = 'sss') # use the robust clustered SEs from Stata
  dim(vcov.out)
  vcov.out <- vcov.out[1:dim(vcov.out)[1], 1:dim(vcov.out)[1]]
  beta.draws <- mvrnorm(n.draws, coef(fm), vcov.out)
  dim(beta.draws)
  
  # extract FEs
  fe <- getfe(fm)
  fe.matrix <- t(fe[c("effect")])
  row.names(fe.matrix) <- NULL
  fe.matrix <- fe.matrix[rep(1:nrow(fe.matrix),
                             each = n.draws),]
  
  # combine with betas
  beta.draws <- cbind(beta.draws, fe.matrix)
  
  # extract model matrix
  model.df <- as.data.frame(fm$X)
  
  # combine with FEs
  fe.df <- bind_rows(fm$fe)
  model.df.full <- cbind(model.df, fe.df)
  
  # create dummies for FEs
  model.df.full <- model.df.full %>%
    mutate(new = 1,
           temp = row_number()) %>%
    spread(zcta_10, new, fill = 0) %>%
    dplyr::select(-temp)
  
  # for each value of change var
  sim.out.medincome.zcta <- map_df(1:length(range.key), function(x){
    
    sim.out.sub <- map_df(1:length(range.mod), function(y){
      # print
      cat(paste("Simulating predicted probabilities for ", x, " of ",
                length(range.key), " ", change.var, " values when ", moderator,
                " = ", range.mod[[y]],
                "\n", sep = ""))
      
      # create temp data
      newd <- model.df.full
      
      if(range.log){
        # replace change var value x
        newd[,change.var] <- range.key[[x]]
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- range.key[[x]]*range.mod[[y]]
        
      } else{
        # replace change var value x
        newd[,change.var] <- log(range.key[[x]] + 1)
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- log(range.key[[x]] + 1)*range.mod[[y]]
        
      }
      
      # matrix multiplication for systematic component
      sys.com <- as.matrix(newd) %*% t(beta.draws)
      
      # average over observations for each draw
      ev <- as.data.frame(sys.com) %>%
        summarise_all(funs(mean))
      
      if(dv.log){
        # convert to original scale of outcome
        ev <- exp(ev) %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
        
      } else{
        ev <- ev %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
      }
    })
    
    # add set identifier
    sim.out <- sim.out.sub %>%
      mutate(setx = range.key[[x]])
    
    return(sim.out)
  })
  
  # # check
  # glimpse(sim.out.medincome.zcta)
  
  # save
  save(sim.out.medincome.zcta, 
       file = paste(RESULTS, "cces-sim-out-medincome-zcta.RData", sep = ""))
  
} else {
  
  # load
  load(file = paste(RESULTS, "cces-sim-out-medincome-zcta.RData", sep = ""))
  
}

# calculate DiD when moving from 1 s.d. below to above mean
did.out.medincome.zcta <- sim.out.medincome.zcta %>%
  gather(draw, value, -setx, -mod) %>%
  spread(mod, value) %>%
  mutate(fd = `1` - `0`) %>%
  dplyr::select(setx, draw, fd) %>%
  spread(setx, fd) %>%
  rename(minus_one_sd = 2, 
         plus_one_sd = 3) %>%
  mutate(did = plus_one_sd - minus_one_sd) %>%
  dplyr::summarize(mean = mean(did, na.rm = TRUE),
                   lwr = quantile(did, probs = 0.025),
                   upr = quantile(did, probs = 0.975)) %>%
  ungroup()

# compare effect size to variation in the outcome
did.out.medincome.zcta # mean 0.06, lower 0.04, upr 0.08
did.out.medincome.zcta * 10000 # unit is $10,000, mean $603, lower $401, upr $803
sd(sim.df$median_household_income_zcta, na.rm = TRUE) # 2.29
did.out.medincome.zcta$mean/sd(sim.df$median_household_income_zcta, na.rm = TRUE) # 0.03 s.d.


################################################################################
## Simulate effects: Substantive effect of exposure on ZCTA number of business establishments
################################################################################
# 1 sd above/below mean
mean.cn.ug <- mean(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
mean.cn.ug
exp(mean.cn.ug)

sd.cn.ug <- sd(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
sd.cn.ug
exp(sd.cn.ug)

range.key <- c(mean.cn.ug - sd.cn.ug, mean.cn.ug + sd.cn.ug)
range.key
exp(range.key)

# range of moderator
range.mod <- sort(unique(df.stay.12.14$time3))
range.mod

# set parameters
sim.df <- df.stay.12.14 
fm <- did.establish.c.lfe
change.var <- "log_china_undergraduate"
moderator <- "time3"
interaction <- "china_inter"
range.key <- range.key
range.mod <- range.mod
n.draws <- 1000
dv.log <- TRUE
range.log <- TRUE

if (first.time){ 
  
  # set seed for simulation
  seed <- 1234
  set.seed(seed)
  
  # load library to draw from multivariate normal
  library(MASS)
  vcov.out <- vcovHC(did.establish.c, type = 'sss') # use the robust clustered SEs from Stata
  dim(vcov.out)
  vcov.out <- vcov.out[1:dim(vcov.out)[1], 1:dim(vcov.out)[1]]
  beta.draws <- mvrnorm(n.draws, coef(fm), vcov.out)
  dim(beta.draws)
  
  # extract FEs
  fe <- getfe(fm)
  fe.matrix <- t(fe[c("effect")])
  row.names(fe.matrix) <- NULL
  fe.matrix <- fe.matrix[rep(1:nrow(fe.matrix),
                             each = n.draws),]
  
  # combine with betas
  beta.draws <- cbind(beta.draws, fe.matrix)
  
  # extract model matrix
  model.df <- as.data.frame(fm$X)
  
  # combine with FEs
  fe.df <- bind_rows(fm$fe)
  model.df.full <- cbind(model.df, fe.df)
  
  # create dummies for FEs
  model.df.full <- model.df.full %>%
    mutate(new = 1,
           temp = row_number()) %>%
    spread(zcta_10, new, fill = 0) %>%
    dplyr::select(-temp)
  
  # for each value of change var
  sim.out.establish.zcta <- map_df(1:length(range.key), function(x){
    
    sim.out.sub <- map_df(1:length(range.mod), function(y){
      # print
      cat(paste("Simulating predicted probabilities for ", x, " of ",
                length(range.key), " ", change.var, " values when ", moderator,
                " = ", range.mod[[y]],
                "\n", sep = ""))
      
      # create temp data
      newd <- model.df.full
      
      if(range.log){
        # replace change var value x
        newd[,change.var] <- range.key[[x]]
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- range.key[[x]]*range.mod[[y]]
        
      } else{
        # replace change var value x
        newd[,change.var] <- log(range.key[[x]] + 1)
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- log(range.key[[x]] + 1)*range.mod[[y]]
        
      }
      
      # matrix multiplication for systematic component
      sys.com <- as.matrix(newd) %*% t(beta.draws)
      
      # average over observations for each draw
      ev <- as.data.frame(sys.com) %>%
        summarise_all(funs(mean))
      
      if(dv.log){
        # convert to original scale of outcome
        ev <- exp(ev) %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
        
      } else{
        ev <- ev %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
      }
    })
    
    # add set identifier
    sim.out <- sim.out.sub %>%
      mutate(setx = range.key[[x]])
    
    return(sim.out)
  })
  
  # # check
  # glimpse(sim.out.establish.zcta)
  
  # save
  save(sim.out.establish.zcta, 
       file = paste(RESULTS, "cces-sim-out-establish-zcta.RData", sep = ""))
  
} else {
  
  # load
  load(file = paste(RESULTS, "cces-sim-out-establish-zcta.RData", sep = ""))  
  
}

# calculate DiD when moving from 1 s.d. below to above mean
did.out.establish.zcta <- sim.out.establish.zcta %>%
  gather(draw, value, -setx, -mod) %>%
  spread(mod, value) %>%
  mutate(fd = `1` - `0`) %>%
  dplyr::select(setx, draw, fd) %>%
  spread(setx, fd) %>%
  rename(minus_one_sd = 2, 
         plus_one_sd = 3) %>%
  mutate(did = plus_one_sd - minus_one_sd) %>%
  dplyr::summarize(mean = mean(did, na.rm = TRUE),
                   lwr = quantile(did, probs = 0.025),
                   upr = quantile(did, probs = 0.975)) %>%
  ungroup()

# compare effect size to variation in the outcome (non-logged)
did.out.establish.zcta # mean 2.02, lower 0.7, upr 4.12
sd(sim.df$n_establish_zcta, na.rm = TRUE) # 546.12
did.out.establish.zcta$mean/sd(sim.df$n_establish_zcta, na.rm = TRUE) # 0.004 s.d.


################################################################################
## Simulate effects: Substantive effect of exposure on ZCTA number of new vehicle registrations
################################################################################
# 1 sd above/below mean
mean.cn.ug <- mean(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
mean.cn.ug
exp(mean.cn.ug)

sd.cn.ug <- sd(df.stay.12.14$log_china_undergraduate, na.rm = TRUE)
sd.cn.ug
exp(sd.cn.ug)

range.key <- c(mean.cn.ug - sd.cn.ug, mean.cn.ug + sd.cn.ug)
range.key
exp(range.key)

# range of moderator
range.mod <- sort(unique(df.stay.12.14$time3))
range.mod

# set parameters
sim.df <- df.stay.12.14 
fm <- did.vehicle.out$did.vehicle.c.lfe
change.var <- "log_china_undergraduate"
moderator <- "time3"
interaction <- "china_inter"
range.key <- range.key
range.mod <- range.mod
n.draws <- 1000
dv.log <- TRUE
range.log <- TRUE

if (first.time){ 
  
  # set seed for simulation
  seed <- 1234
  set.seed(seed)
  
  # load library to draw from multivariate normal
  library(MASS)
  vcov.out <- vcovHC(did.vehicle.out$did.vehicle.c, type = 'sss') # use the robust clustered SEs from Stata
  dim(vcov.out)
  vcov.out <- vcov.out[1:dim(vcov.out)[1], 1:dim(vcov.out)[1]]
  beta.draws <- mvrnorm(n.draws, coef(fm), vcov.out)
  dim(beta.draws)
  
  # extract FEs
  fe <- getfe(fm)
  fe.matrix <- t(fe[c("effect")])
  row.names(fe.matrix) <- NULL
  fe.matrix <- fe.matrix[rep(1:nrow(fe.matrix),
                             each = n.draws),]
  
  # combine with betas
  beta.draws <- cbind(beta.draws, fe.matrix)
  
  # extract model matrix
  model.df <- as.data.frame(fm$X)
  
  # combine with FEs
  fe.df <- bind_rows(fm$fe)
  model.df.full <- cbind(model.df, fe.df)
  
  # create dummies for FEs
  model.df.full <- model.df.full %>%
    mutate(new = 1,
           temp = row_number()) %>%
    spread(zcta_10, new, fill = 0) %>%
    dplyr::select(-temp)
  
  # for each value of change var
  sim.out.vehicle.zcta <- map_df(1:length(range.key), function(x){
    
    sim.out.sub <- map_df(1:length(range.mod), function(y){
      # print
      cat(paste("Simulating predicted probabilities for ", x, " of ",
                length(range.key), " ", change.var, " values when ", moderator,
                " = ", range.mod[[y]],
                "\n", sep = ""))
      
      # create temp data
      newd <- model.df.full
      
      if(range.log){
        # replace change var value x
        newd[,change.var] <- range.key[[x]]
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- range.key[[x]]*range.mod[[y]]
        
      } else{
        # replace change var value x
        newd[,change.var] <- log(range.key[[x]] + 1)
        
        # replace moderator value y
        newd[,moderator] <- range.mod[[y]]
        
        # calculate new interaction
        newd[,interaction] <- log(range.key[[x]] + 1)*range.mod[[y]]
        
      }
      
      # matrix multiplication for systematic component
      sys.com <- as.matrix(newd) %*% t(beta.draws)
      
      # average over observations for each draw
      ev <- as.data.frame(sys.com) %>%
        summarise_all(funs(mean))
      
      if(dv.log){
        # convert to original scale of outcome
        ev <- exp(ev) %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
        
      } else{
        ev <- ev %>%
          mutate(mod = range.mod[[y]])
        
        return(ev)
      }
    })
    
    # add set identifier
    sim.out <- sim.out.sub %>%
      mutate(setx = range.key[[x]])
    
    return(sim.out)
  })
  
  # # check
  # glimpse(sim.out.vehicle.zcta)
  
  # save
  save(sim.out.vehicle.zcta, 
       file = paste(RESULTS, "cces-sim-out-vehicle-zcta.RData", sep = ""))
  
} else {
  
  # load
  load(file = paste(RESULTS, "cces-sim-out-vehicle-zcta.RData", sep = ""))
  
}

# calculate DiD when moving from 1 s.d. below to above mean
did.out.vehicle.zcta <- sim.out.vehicle.zcta %>%
  gather(draw, value, -setx, -mod) %>%
  spread(mod, value) %>%
  mutate(fd = `1` - `0`) %>%
  dplyr::select(setx, draw, fd) %>%
  spread(setx, fd) %>%
  rename(minus_one_sd = 2, 
         plus_one_sd = 3) %>%
  mutate(did = plus_one_sd - minus_one_sd) %>%
  dplyr::summarize(mean = mean(did, na.rm = TRUE),
                   lwr = quantile(did, probs = 0.025),
                   upr = quantile(did, probs = 0.975)) %>%
  ungroup()

# compare effect size to variation in the outcome (non-logged)
did.out.vehicle.zcta # mean 10.26, lower 7.74, upr 13.24
#sd(sim.df$new_vehicle_total, na.rm = TRUE) # 733.5656
#did.out.vehicle.zcta$mean/sd(sim.df$new_vehicle_total, na.rm = TRUE) # 0.01 s.d.
did.out.vehicle.zcta$mean/733.5656 # 0.01 s.d.


################################################################################
## Figure 1a
################################################################################
# set parameters
alpha <- 0.05
y.limits <- c(-1.2, 1.2)

# extract coefs and clustered SEs
coef.df.1a <- tibble(model = c("Concurrent\nCHN Undergrad", "2012\nCHN Undergrad"),
                     coef = c(did.1.a.out[,1]["china_inter"],
                              did.1.b.out[,1]["suitability2012_inter"]),
                     cse = c(did.1.a.out[,2]["china_inter"],
                             did.1.b.out[,2]["suitability2012_inter"]))

# calculate C.I.
coef.df.1a <- coef.df.1a %>%
  mutate(multiplier = qnorm(1 - alpha/2),
         lwr = coef - multiplier*cse,
         upr = coef + multiplier*cse,
         model = factor(model, levels = c("Concurrent\nCHN Undergrad",
                                          "2012\nCHN Undergrad")))
coef.df.1a

# df for annotations
txt.shift.1a <- 0.15
txt.pos.1a <- tibble(model = coef.df.1a$model,
                     coef = c(coef.df.1a[1,]$lwr - txt.shift.1a,
                              coef.df.1a[2,]$lwr - txt.shift.1a),
                     lwr = coef.df.1a$lwr,
                     upr = coef.df.1a$upr)
                         

# plot
axis.title.size <- 16

p.coef.1a <- ggplot(data = coef.df.1a,
                           aes(x = model, y = coef,
                               ymin = lwr, 
                               ymax = upr)) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  geom_text(data = txt.pos.1a,
            aes(label = model),
            size = axis.title.size - 11) +
  scale_x_discrete("") +
  scale_y_continuous("\nDiD Effect of CHN Int'l UG Students (log)",
                     limits = y.limits) +
  theme_bw() +
  ggtitle("CCES Panel Data\n2012-2014 Anti-immigration Attitudes") +
  theme(legend.position = "none",
        plot.title = element_text(size = axis.title.size + 4,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        plot.margin = margin(0.1, 0.5, 0.1, 0.1, "cm"),
        axis.title.y = element_text(size = axis.title.size,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0)),
        panel.grid = element_blank()) 

p.coef.1a

# save
pdf(file = paste(FIG_DIR, "Figure-1a.pdf", sep = "/"),
    width = 6.5, height = 5.8)
print(p.coef.1a)
dev.off()

# ggsave(file = paste(FIG_DIR, "coef-cces-main.eps", sep = "/"),
#        width = 6.5, height = 5.8)


################################################################################
## Figure 2a: Homeownership
################################################################################
# get coef and C.I.s
coef.df.nonhomeowners <- tibble(var = "Non-Homeowners",
                                coef = did.out.nonhomeowner.zcta$mean,
                                lwr = did.out.nonhomeowner.zcta$lwr,
                                upr = did.out.nonhomeowner.zcta$upr)

coef.df.homeowners <- tibble(var = "Homeowners",
                             coef = did.out.homeowner.zcta$mean,
                             lwr = did.out.homeowner.zcta$lwr,
                             upr = did.out.homeowner.zcta$upr)

# combine
coef.ho <- rbind(coef.df.nonhomeowners, 
                 coef.df.homeowners)

coef.ho <- coef.ho %>%
  mutate(var = factor(var, levels = c("Non-Homeowners", 
                                      "Homeowners")))

# df for annotations
txt.shift.ho <- 1
txt.pos.ho <- tibble(var = coef.ho$var,
                     coef = c(coef.ho[1,]$lwr - txt.shift.ho,
                              coef.ho[2,]$lwr - txt.shift.ho),
                     lwr = coef.ho$lwr,
                     upr = coef.ho$upr)

# plot
axis.title.size <- 16

p.coef.ho <- ggplot(data = coef.ho,
                    aes(x = var, y = coef,
                        ymin = lwr, 
                        ymax = upr)) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  geom_text(data = txt.pos.ho,
            aes(label = var),
            size = axis.title.size - 11.5) +
  scale_x_discrete("") +
  scale_y_continuous(expression(paste("\n", Delta, " in 2012-2014 Anti-immigration Attitudes (p.p.)", sep = "")),
                     breaks = seq(-15, 15, 5),
                     labels = seq(-15, 15, 5),
                     limits = c(-17, 17)
                     ) +
  theme_bw() +  
  ggtitle("CCES Respondents") +
  theme(legend.position = "none",
        plot.title = element_text(size = axis.title.size + 4,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        plot.margin = margin(0.1, 0.5, 0.1, 0.1, "cm"),
        axis.title.y = element_text(size = axis.title.size + 2,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0)),
        panel.grid = element_blank()) 

p.coef.ho

# save
pdf(file = paste(FIG_DIR, "Figure-2a.pdf", sep = "/"),
    width = 4.1, height = 5.8)
print(p.coef.ho)
dev.off()

# ggsave(file = paste(FIG_DIR, "Figure-2a.eps", sep = "/"),
#        width = 4.1, height = 5.8)


################################################################################
## Figure 2b: Employment Rate
################################################################################
# get coef and C.I.s
coef.df.employ <- tibble(var = "Employment Rate",
                         coef = did.out.employ.zcta$mean,
                         lwr = did.out.employ.zcta$lwr,
                         upr = did.out.employ.zcta$upr)
coef.df.employ

# plot
axis.title.size <- 16

p.coef.employ <- ggplot(data = coef.df.employ,
                    aes(x = var, y = coef,
                        ymin = lwr, 
                        ymax = upr)) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  scale_x_discrete("") +
  scale_y_continuous(expression(paste("\n", Delta, " in 2012-2014 Employment Rate (p.p.)", sep = "")),
                     limit = c(-0.6, 0.6)) +
  theme_bw() +  
  ggtitle("CCES ZIP Codes") +
  theme(legend.position = "none",
        plot.title = element_text(size = axis.title.size + 4,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        plot.margin = margin(0.1, 0.5, 0.1, 0.1, "cm"),
        axis.title.y = element_text(size = axis.title.size + 2,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0)),
        panel.grid = element_blank()) 

p.coef.employ

# save
pdf(file = paste(FIG_DIR, "Figure-2b.pdf", sep = "/"),
    width = 4, height = 5.8)
print(p.coef.employ)
dev.off()

# ggsave(file = paste(FIG_DIR, "Figure-2b.eps", sep = "/"),
#        width = 4, height = 5.8)


################################################################################
## Figure 2c: Median Household Income
################################################################################
# get coef and C.I.s
coef.df.medincome <- tibble(var = "Median Household Income",
                            coef = did.out.medincome.zcta$mean * 10000,
                            lwr = did.out.medincome.zcta$lwr * 10000,
                            upr = did.out.medincome.zcta$upr * 10000)
coef.df.medincome

# plot
axis.title.size <- 16

p.coef.medincome <- ggplot(data = coef.df.medincome,
                        aes(x = var, y = coef,
                            ymin = lwr, 
                            ymax = upr)) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  scale_x_discrete("") +
  scale_y_continuous(expression(paste("\n", Delta, " in 2012-2014 Median Household Income ($)", sep = "")),
                     breaks = seq(-800, 800, 400),
                     label = seq(-800, 800, 400),
                     limit = c(-803, 803)) +
  theme_bw() +  
  ggtitle("CCES ZIP Codes") +
  theme(legend.position = "none",
        plot.title = element_text(size = axis.title.size + 4,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        plot.margin = margin(0.1, 0.5, 0.1, 0.1, "cm"),
        axis.title.y = element_text(size = axis.title.size + 2,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0)),
        panel.grid = element_blank()) 

p.coef.medincome

# save
pdf(file = paste(FIG_DIR, "Figure-2c.pdf", sep = "/"),
    width = 4, height = 5.8)
print(p.coef.medincome)
dev.off()

# ggsave(file = paste(FIG_DIR, "Figure-2c.eps", sep = "/"),
#        width = 4, height = 5.8)


################################################################################
## Figure 2d: Business Establishments
################################################################################
# get coef and C.I.s
coef.df.establish <- tibble(var = "Business Establishments",
                            coef = did.out.establish.zcta$mean,
                            lwr = did.out.establish.zcta$lwr,
                            upr = did.out.establish.zcta$upr)
coef.df.establish

# plot
axis.title.size <- 16

p.coef.establish <- ggplot(data = coef.df.establish,
                           aes(x = var, y = coef,
                               ymin = lwr, 
                               ymax = upr)) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  scale_x_discrete("") +
  scale_y_continuous(expression(paste("\n", Delta, " in 2012-2014 # of Business Establishments", sep = "")),
                     breaks = seq(-4, 4, 2),
                     labels = seq(-4, 4, 2),
                     limit = c(-4.5, 4.5)) +
  theme_bw() +  
  ggtitle("CCES ZIP Codes") +
  theme(legend.position = "none",
        plot.title = element_text(size = axis.title.size + 4,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        plot.margin = margin(0.1, 0.5, 0.1, 0.1, "cm"),
        axis.title.y = element_text(size = axis.title.size + 2,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0)),
        panel.grid = element_blank()) 

p.coef.establish

# save
pdf(file = paste(FIG_DIR, "Figure-2d.pdf", sep = "/"),
    width = 4, height = 5.8)
print(p.coef.establish)
dev.off()

# ggsave(file = paste(FIG_DIR, "Figure-2d.eps", sep = "/"),
#        width = 4, height = 5.8)


################################################################################
## Figure 2e: New Vehicle Registration
################################################################################
# get coef and C.I.s
coef.df.vehicle <- tibble(var = "New Vehicle Registrations",
                          coef = did.out.vehicle.zcta$mean,
                          lwr = did.out.vehicle.zcta$lwr,
                          upr = did.out.vehicle.zcta$upr)
coef.df.vehicle

# plot
axis.title.size <- 16

p.coef.vehicle <- ggplot(data = coef.df.vehicle,
                         aes(x = var, y = coef,
                             ymin = lwr, 
                             ymax = upr)) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  scale_x_discrete("") +
  scale_y_continuous(expression(paste("\n", Delta, " in 2012-2014 # of New Vehicle Registrations", sep = "")),
                     breaks = seq(-15, 15, 5),
                     labels = seq(-15, 15, 5),
                     limit = c(-15, 15)
                     ) +
  theme_bw() +  
  ggtitle("CCES ZIP Codes") +
  theme(legend.position = "none",
        plot.title = element_text(size = axis.title.size + 4,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        plot.margin = margin(0.1, 0.5, 0.1, 0.1, "cm"),
        axis.title.y = element_text(size = axis.title.size + 2,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0)),
        panel.grid = element_blank()) 

p.coef.vehicle

# save
pdf(file = paste(FIG_DIR, "Figure-2e.pdf", sep = "/"),
    width = 4, height = 5.8)
print(p.coef.vehicle)
dev.off()

# ggsave(file = paste(FIG_DIR, "Figure-2e.eps", sep = "/"),
#        width = 4, height = 5.8)

# combine all panels in Figure 2
pdf(file = paste(FIG_DIR, "Figure-2.pdf", sep = "/"),
    width = 20, height = 5.85)
grid.arrange(p.coef.ho, p.coef.employ, p.coef.medincome, p.coef.establish, p.coef.vehicle, 
             ncol = 5, nrow = 1)
dev.off()